package com.shengzai.sql

import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}

object Demo1WorldCountDSL {
  def main(args: Array[String]): Unit = {

    val spark: SparkSession = SparkSession
      .builder()
      .master("local")
      .appName("worldCount")
      .config("spark.sql.shuffle.partitions", 1)
      .getOrCreate()


  val wordLine: DataFrame = spark
    .read
    .format("csv")
    .option("sep", "|")
    .schema("line String")
    .load("data/worlds.txt")

    import org.apache.spark.sql.functions._
    import spark.implicits._

    val res: DataFrame = wordLine.select(explode(split($"line", ",")) as "world")
      .groupBy($"world")
      .agg(count($"world") as "num")

    res.write
      .format("csv")
      .mode(SaveMode.Overwrite)
      .save("data/worldCount")
  }

}
